# import dependencies
import os
import numpy as np
import pandas as pd
import scanpy as sc
import loompy as lp
from MulticoreTSNE import MulticoreTSNE as TSNE
import json
import base64
import zlib
# set variables for file paths to read from and write to:
# set a working directory
wdir = "/ddn1/vol1/staging/leuven/stg_00002/lcb/cflerin/testruns/scenic-protocol/prottest3/"
os.chdir( wdir )
# path to loom output, generated from a combination of Scanpy and pySCENIC results:
f_final_loom = 'pbmc10k_scenic_integrated-output.loom'
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=120)
# scenic output
lf = lp.connect( f_final_loom, mode='r', validate=False )
meta = json.loads(zlib.decompress(base64.b64decode( lf.attrs.MetaData )))
#exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID)
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
regulons = lf.ra.Regulons
###
cellAnnot = pd.concat(
[
pd.DataFrame( lf.ca.Celltype_Garnett, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.ClusterID, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.Louvain_clusters_Scanpy, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.Percent_mito, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.nGene, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.nUMI, index=lf.ca.CellID ),
],
axis=1
)
cellAnnot.columns = [
'Celltype_Garnett',
'ClusterID',
'Louvain_clusters_Scanpy',
'Percent_mito',
'nGene',
'nUMI']
lf.close()
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_rss
import matplotlib.pyplot as plt
from adjustText import adjust_text
import seaborn as sns
from pyscenic.binarization import binarize
rss_cellType = regulon_specificity_scores( auc_mtx, cellAnnot['Celltype_Garnett'] )
rss_cellType
cats = sorted(list(set(cellAnnot['Celltype_Garnett'])))
fig = plt.figure(figsize=(15, 8))
for c,num in zip(cats, range(1,len(cats)+1)):
x=rss_cellType.T[c]
ax = fig.add_subplot(2,5,num)
plot_rss(rss_cellType, c, top_n=5, max_n=None, ax=ax)
ax.set_ylim( x.min()-(x.max()-x.min())*0.05 , x.max()+(x.max()-x.min())*0.05 )
for t in ax.texts:
t.set_fontsize(12)
ax.set_ylabel('')
ax.set_xlabel('')
adjust_text(ax.texts, autoalign='xy', ha='right', arrowprops=dict(arrowstyle='-',color='lightgrey'), precision=0.001 )
fig.text(0.5, 0.0, 'Regulon', ha='center', va='center', size='x-large')
fig.text(0.00, 0.5, 'Regulon specificity score (RSS)', ha='center', va='center', rotation='vertical', size='x-large')
plt.tight_layout()
plt.rcParams.update({
'figure.autolayout': True,
'figure.titlesize': 'large' ,
#'axes.labelsize': 'x-large',
#'axes.titlesize':'x-large',
'xtick.labelsize':'large',
'ytick.labelsize':'large'
})
plt.savefig("PBMC10k_cellType-RSS-top5.png", dpi=150, bbox_inches = "tight")
plt.show()
topreg = []
for i,c in enumerate(cats):
topreg.extend(
list(rss_cellType.T[c].sort_values(ascending=False)[:5].index)
)
topreg = list(set(topreg))
Aggregate the AUC matrix by cell type:
auc_mtx_celltype = auc_mtx.groupby(cellAnnot['Celltype_Garnett'],axis=0).mean()
auc_mtx_celltype
fig, ax = plt.subplots(1, 1, figsize = (8,8), dpi=300)
sns.heatmap(auc_mtx_celltype[topreg], ax=ax, annot=True, fmt=".2f", linewidths=.7, cbar=False, square=True, linecolor='gray',
cmap="YlGnBu", annot_kws={"size": 5})
ax.tick_params(axis='both', which='major', pad=-2)
sns.set(font_scale=0.7)
ax.set_ylabel('')
ax.set_xlabel('')
plt.tight_layout()
plt.savefig("PBMC10k_cellType-AUC-heatmap-top5.png", dpi=150, bbox_inches = "tight")
plt.show()
g = sns.clustermap(auc_mtx_celltype[topreg], annot=True, fmt=".2f", linewidths=.7, cbar=False, square=False, linecolor='gray',
cmap="YlGnBu", annot_kws={"size": 6}, figsize=(12,5) )
sns.set(font_scale=0.7)
g.cax.set_visible(False)
g.ax_heatmap.set_ylabel('')
g.ax_heatmap.set_xlabel('')
binary_mtx_celltype, auc_thresholds = binarize( auc_mtx_celltype[ topreg ], num_workers=25 )
#binary_mtx_celltype = binary_mtx_celltype.astype('category')
binary_mtx_celltype
fig, ax = plt.subplots(1, 1, figsize = (8,8), dpi=300)
sns.heatmap(binary_mtx_celltype, ax=ax, annot=True, linewidths=.7, cbar=False, square=True, linecolor='gray',
cmap="YlGnBu", annot_kws={"size": 5})
ax.tick_params(axis='both', which='major', pad=-2)
sns.set(font_scale=0.7)
ax.set_ylabel('')
ax.set_xlabel('')
plt.savefig("PBMC10k_binary-heatmap-celltype-top5.png", dpi=150, bbox_inches = "tight")
plt.tight_layout()
plt.show()
rss_louvain = regulon_specificity_scores( auc_mtx, cellAnnot['Louvain_clusters_Scanpy'] )
rss_louvain
cats = sorted( list(set(cellAnnot['Louvain_clusters_Scanpy'])), key=int )
fig = plt.figure(figsize=(15, 12))
for c,num in zip(cats, range(1,len(cats)+1)):
x=rss_louvain.T[c]
ax = fig.add_subplot(3,5,num)
plot_rss(rss_louvain, c, top_n=5, max_n=None, ax=ax)
ax.set_ylim( x.min()-(x.max()-x.min())*0.05 , x.max()+(x.max()-x.min())*0.05 )
for t in ax.texts:
t.set_fontsize(12)
ax.set_ylabel('')
ax.set_xlabel('')
adjust_text(ax.texts, autoalign='xy', ha='right', arrowprops=dict(arrowstyle='-',color='lightgrey'), precision=0.001 )
fig.text(0.5, 0.0, 'Regulon', ha='center', va='center', size='x-large')
fig.text(0.00, 0.5, 'Regulon specificity score (RSS)', ha='center', va='center', rotation='vertical', size='x-large')
plt.tight_layout()
plt.rcParams.update({
'figure.autolayout': True,
'figure.titlesize': 'large' ,
#'axes.labelsize': 'x-large',
#'axes.titlesize':'x-large',
'xtick.labelsize':'large',
'ytick.labelsize':'large'
})
plt.savefig("PBMC10k_Louvain-RSS-top5.png", dpi=150, bbox_inches = "tight")
plt.show()
topreg = []
for i,c in enumerate(cats):
topreg.extend(
list(rss_louvain.T[c].sort_values(ascending=False)[:5].index)
)
topreg = list(set(topreg))
Aggregate the AUC matrix by cell type:
auc_mtx_louvain = auc_mtx.groupby(cellAnnot['Louvain_clusters_Scanpy'],axis=0).mean()
auc_mtx_louvain
fig, ax = plt.subplots(1, 1, figsize = (8,12), dpi=300)
sns.heatmap(auc_mtx_louvain[topreg], ax=ax, annot=True, fmt=".2f", linewidths=.7, cbar=False, square=True, linecolor='gray',
cmap="YlGnBu", annot_kws={"size": 5})
ax.tick_params(axis='both', which='major', pad=-2)
sns.set(font_scale=0.7)
ax.set_ylabel('')
ax.set_xlabel('')
plt.tight_layout()
plt.savefig("PBMC10k_Louvain-AUC-heatmap-top5.png", dpi=150, bbox_inches = "tight")
plt.show()
g = sns.clustermap(auc_mtx_louvain[topreg], annot=True, fmt=".2f", linewidths=.7, cbar=False, square=False, linecolor='gray',
cmap="YlGnBu", annot_kws={"size": 6}, figsize=(14,7) )
sns.set(font_scale=0.7)
g.cax.set_visible(False)
g.ax_heatmap.set_ylabel('')
g.ax_heatmap.set_xlabel('')
binary_mtx_louvain, auc_thresholds = binarize( auc_mtx_louvain[ topreg ], num_workers=25 )
binary_mtx_louvain
fig, ax = plt.subplots(1, 1, figsize = (8,8), dpi=300)
sns.heatmap(binary_mtx_louvain, ax=ax, annot=True, linewidths=.7, cbar=False, square=True, linecolor='gray',
cmap="YlGnBu", annot_kws={"size": 5})
ax.tick_params(axis='both', which='major', pad=-2)
sns.set(font_scale=0.7)
ax.set_ylabel('')
ax.set_xlabel('')
plt.tight_layout()
plt.show()